In [ ]:
import seaborn as sns
import matplotlib.pyplot as plt
In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.read_csv(r"C:\Users\Anusha\OneDrive\Desktop\pokemon.csv")
data
Out[3]:
abilities against_bug against_dark against_dragon against_electric against_fairy against_fight against_fire against_flying against_ghost ... percentage_male pokedex_number sp_attack sp_defense speed type1 type2 weight_kg generation is_legendary
0 ['Overgrow', 'Chlorophyll'] 1.00 1.0 1.0 0.5 0.5 0.5 2.0 2.0 1.0 ... 88.1 1 65 65 45 grass poison 6.9 1 0
1 ['Overgrow', 'Chlorophyll'] 1.00 1.0 1.0 0.5 0.5 0.5 2.0 2.0 1.0 ... 88.1 2 80 80 60 grass poison 13.0 1 0
2 ['Overgrow', 'Chlorophyll'] 1.00 1.0 1.0 0.5 0.5 0.5 2.0 2.0 1.0 ... 88.1 3 122 120 80 grass poison 100.0 1 0
3 ['Blaze', 'Solar Power'] 0.50 1.0 1.0 1.0 0.5 1.0 0.5 1.0 1.0 ... 88.1 4 60 50 65 fire NaN 8.5 1 0
4 ['Blaze', 'Solar Power'] 0.50 1.0 1.0 1.0 0.5 1.0 0.5 1.0 1.0 ... 88.1 5 80 65 80 fire NaN 19.0 1 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
796 ['Beast Boost'] 0.25 1.0 0.5 2.0 0.5 1.0 2.0 0.5 1.0 ... NaN 797 107 101 61 steel flying 999.9 7 1
797 ['Beast Boost'] 1.00 1.0 0.5 0.5 0.5 2.0 4.0 1.0 1.0 ... NaN 798 59 31 109 grass steel 0.1 7 1
798 ['Beast Boost'] 2.00 0.5 2.0 0.5 4.0 2.0 0.5 1.0 0.5 ... NaN 799 97 53 43 dark dragon 888.0 7 1
799 ['Prism Armor'] 2.00 2.0 1.0 1.0 1.0 0.5 1.0 1.0 2.0 ... NaN 800 127 89 79 psychic NaN 230.0 7 1
800 ['Soul-Heart'] 0.25 0.5 0.0 1.0 0.5 1.0 2.0 0.5 1.0 ... NaN 801 130 115 65 steel fairy 80.5 7 1

801 rows × 41 columns

In [4]:
print(data.info())
print(data.isnull().sum())
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 801 entries, 0 to 800
Data columns (total 41 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   abilities          801 non-null    object 
 1   against_bug        801 non-null    float64
 2   against_dark       801 non-null    float64
 3   against_dragon     801 non-null    float64
 4   against_electric   801 non-null    float64
 5   against_fairy      801 non-null    float64
 6   against_fight      801 non-null    float64
 7   against_fire       801 non-null    float64
 8   against_flying     801 non-null    float64
 9   against_ghost      801 non-null    float64
 10  against_grass      801 non-null    float64
 11  against_ground     801 non-null    float64
 12  against_ice        801 non-null    float64
 13  against_normal     801 non-null    float64
 14  against_poison     801 non-null    float64
 15  against_psychic    801 non-null    float64
 16  against_rock       801 non-null    float64
 17  against_steel      801 non-null    float64
 18  against_water      801 non-null    float64
 19  attack             801 non-null    int64  
 20  base_egg_steps     801 non-null    int64  
 21  base_happiness     801 non-null    int64  
 22  base_total         801 non-null    int64  
 23  capture_rate       801 non-null    object 
 24  classfication      801 non-null    object 
 25  defense            801 non-null    int64  
 26  experience_growth  801 non-null    int64  
 27  height_m           781 non-null    float64
 28  hp                 801 non-null    int64  
 29  japanese_name      801 non-null    object 
 30  name               801 non-null    object 
 31  percentage_male    703 non-null    float64
 32  pokedex_number     801 non-null    int64  
 33  sp_attack          801 non-null    int64  
 34  sp_defense         801 non-null    int64  
 35  speed              801 non-null    int64  
 36  type1              801 non-null    object 
 37  type2              417 non-null    object 
 38  weight_kg          781 non-null    float64
 39  generation         801 non-null    int64  
 40  is_legendary       801 non-null    int64  
dtypes: float64(21), int64(13), object(7)
memory usage: 256.7+ KB
None
abilities              0
against_bug            0
against_dark           0
against_dragon         0
against_electric       0
against_fairy          0
against_fight          0
against_fire           0
against_flying         0
against_ghost          0
against_grass          0
against_ground         0
against_ice            0
against_normal         0
against_poison         0
against_psychic        0
against_rock           0
against_steel          0
against_water          0
attack                 0
base_egg_steps         0
base_happiness         0
base_total             0
capture_rate           0
classfication          0
defense                0
experience_growth      0
height_m              20
hp                     0
japanese_name          0
name                   0
percentage_male       98
pokedex_number         0
sp_attack              0
sp_defense             0
speed                  0
type1                  0
type2                384
weight_kg             20
generation             0
is_legendary           0
dtype: int64
In [5]:
print(data.describe())
       against_bug  against_dark  against_dragon  against_electric  \
count   801.000000    801.000000      801.000000        801.000000   
mean      0.996255      1.057116        0.968789          1.073970   
std       0.597248      0.438142        0.353058          0.654962   
min       0.250000      0.250000        0.000000          0.000000   
25%       0.500000      1.000000        1.000000          0.500000   
50%       1.000000      1.000000        1.000000          1.000000   
75%       1.000000      1.000000        1.000000          1.000000   
max       4.000000      4.000000        2.000000          4.000000   

       against_fairy  against_fight  against_fire  against_flying  \
count     801.000000     801.000000    801.000000      801.000000   
mean        1.068976       1.065543      1.135456        1.192884   
std         0.522167       0.717251      0.691853        0.604488   
min         0.250000       0.000000      0.250000        0.250000   
25%         1.000000       0.500000      0.500000        1.000000   
50%         1.000000       1.000000      1.000000        1.000000   
75%         1.000000       1.000000      2.000000        1.000000   
max         4.000000       4.000000      4.000000        4.000000   

       against_ghost  against_grass  ...    height_m          hp  \
count     801.000000     801.000000  ...  781.000000  801.000000   
mean        0.985019       1.034020  ...    1.163892   68.958801   
std         0.558256       0.788896  ...    1.080326   26.576015   
min         0.000000       0.250000  ...    0.100000    1.000000   
25%         1.000000       0.500000  ...    0.600000   50.000000   
50%         1.000000       1.000000  ...    1.000000   65.000000   
75%         1.000000       1.000000  ...    1.500000   80.000000   
max         4.000000       4.000000  ...   14.500000  255.000000   

       percentage_male  pokedex_number   sp_attack  sp_defense       speed  \
count       703.000000      801.000000  801.000000  801.000000  801.000000   
mean         55.155761      401.000000   71.305868   70.911361   66.334582   
std          20.261623      231.373075   32.353826   27.942501   28.907662   
min           0.000000        1.000000   10.000000   20.000000    5.000000   
25%          50.000000      201.000000   45.000000   50.000000   45.000000   
50%          50.000000      401.000000   65.000000   66.000000   65.000000   
75%          50.000000      601.000000   91.000000   90.000000   85.000000   
max         100.000000      801.000000  194.000000  230.000000  180.000000   

        weight_kg  generation  is_legendary  
count  781.000000  801.000000    801.000000  
mean    61.378105    3.690387      0.087391  
std    109.354766    1.930420      0.282583  
min      0.100000    1.000000      0.000000  
25%      9.000000    2.000000      0.000000  
50%     27.300000    4.000000      0.000000  
75%     64.800000    5.000000      0.000000  
max    999.900000    7.000000      1.000000  

[8 rows x 34 columns]
In [6]:
data['type2'] = data['type2'].fillna('NIL')
In [7]:
data['percentage_male'] = data['percentage_male'].fillna(0)
In [8]:
data['weight_kg'] = data['weight_kg'].fillna(data['weight_kg'].mean())
In [9]:
data['height_m'] = data['height_m'].fillna(data['height_m'].mean())
In [10]:
print(data.isnull().sum())
abilities            0
against_bug          0
against_dark         0
against_dragon       0
against_electric     0
against_fairy        0
against_fight        0
against_fire         0
against_flying       0
against_ghost        0
against_grass        0
against_ground       0
against_ice          0
against_normal       0
against_poison       0
against_psychic      0
against_rock         0
against_steel        0
against_water        0
attack               0
base_egg_steps       0
base_happiness       0
base_total           0
capture_rate         0
classfication        0
defense              0
experience_growth    0
height_m             0
hp                   0
japanese_name        0
name                 0
percentage_male      0
pokedex_number       0
sp_attack            0
sp_defense           0
speed                0
type1                0
type2                0
weight_kg            0
generation           0
is_legendary         0
dtype: int64
In [11]:
print(data.describe())
       against_bug  against_dark  against_dragon  against_electric  \
count   801.000000    801.000000      801.000000        801.000000   
mean      0.996255      1.057116        0.968789          1.073970   
std       0.597248      0.438142        0.353058          0.654962   
min       0.250000      0.250000        0.000000          0.000000   
25%       0.500000      1.000000        1.000000          0.500000   
50%       1.000000      1.000000        1.000000          1.000000   
75%       1.000000      1.000000        1.000000          1.000000   
max       4.000000      4.000000        2.000000          4.000000   

       against_fairy  against_fight  against_fire  against_flying  \
count     801.000000     801.000000    801.000000      801.000000   
mean        1.068976       1.065543      1.135456        1.192884   
std         0.522167       0.717251      0.691853        0.604488   
min         0.250000       0.000000      0.250000        0.250000   
25%         1.000000       0.500000      0.500000        1.000000   
50%         1.000000       1.000000      1.000000        1.000000   
75%         1.000000       1.000000      2.000000        1.000000   
max         4.000000       4.000000      4.000000        4.000000   

       against_ghost  against_grass  ...    height_m          hp  \
count     801.000000     801.000000  ...  801.000000  801.000000   
mean        0.985019       1.034020  ...    1.163892   68.958801   
std         0.558256       0.788896  ...    1.066737   26.576015   
min         0.000000       0.250000  ...    0.100000    1.000000   
25%         1.000000       0.500000  ...    0.600000   50.000000   
50%         1.000000       1.000000  ...    1.000000   65.000000   
75%         1.000000       1.000000  ...    1.500000   80.000000   
max         4.000000       4.000000  ...   14.500000  255.000000   

       percentage_male  pokedex_number   sp_attack  sp_defense       speed  \
count       801.000000      801.000000  801.000000  801.000000  801.000000   
mean         48.407615      401.000000   71.305868   70.911361   66.334582   
std          26.216655      231.373075   32.353826   27.942501   28.907662   
min           0.000000        1.000000   10.000000   20.000000    5.000000   
25%          50.000000      201.000000   45.000000   50.000000   45.000000   
50%          50.000000      401.000000   65.000000   66.000000   65.000000   
75%          50.000000      601.000000   91.000000   90.000000   85.000000   
max         100.000000      801.000000  194.000000  230.000000  180.000000   

        weight_kg  generation  is_legendary  
count  801.000000  801.000000    801.000000  
mean    61.378105    3.690387      0.087391  
std    107.979179    1.930420      0.282583  
min      0.100000    1.000000      0.000000  
25%      9.300000    2.000000      0.000000  
50%     28.500000    4.000000      0.000000  
75%     61.500000    5.000000      0.000000  
max    999.900000    7.000000      1.000000  

[8 rows x 34 columns]
In [12]:
corr = data.corr()
plt.figure(figsize=(28, 26))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.show()
C:\Users\Anusha\AppData\Local\Temp\ipykernel_15256\2694556740.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  corr = data.corr()
In [77]:
from scipy.stats import f_oneway

# Load your dataset into a DataFrame (assuming data is your DataFrame)
# data = pd.read_csv('path_to_pokemon_dataset.csv')

# 1. How are the base stats distributed across different Pokémon types?
plt.figure(figsize=(14, 10))
sns.boxplot(x='type1', y='hp', data=data)
plt.title('Distribution of HP across Pokémon Types')
plt.xlabel('Pokémon Type')
plt.ylabel('HP')
plt.xticks(rotation=90)
plt.show()

# 2. Are there any significant differences in base stats between generations?
gen_stats_comparison = {}
for stat in ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']:
    gen_groups = [group[stat] for name, group in data.groupby('generation')]
    f_stat, p_value = f_oneway(*gen_groups)
    gen_stats_comparison[stat] = {'F-statistic': f_stat, 'p-value': p_value}

print("Comparison of Base Stats between Generations:")
for stat, values in gen_stats_comparison.items():
    print(f"{stat}: F-statistic = {values['F-statistic']}, p-value = {values['p-value']}")

# 3. What are the most common primary and secondary types among all Pokémon?
primary_types = data['type1'].value_counts().index[:5]
secondary_types = data['type2'].value_counts().index[:5]

print("Top 5 Most Common Primary Types:", primary_types)
print("Top 5 Most Common Secondary Types:", secondary_types)

# 4. How do the heights and weights of Pokémon vary across different types and generations?
plt.figure(figsize=(14, 6))
sns.scatterplot(x='height_m', y='weight_kg', hue='generation', data=data)
plt.title('Pokémon Heights vs Weights across Generations')
plt.xlabel('Height (m)')
plt.ylabel('Weight (kg)')
plt.legend(title='Generation')
plt.show()

# 5. Are there any patterns or trends in the capture rates of Pokémon?
plt.figure(figsize=(24, 8))
sns.histplot(data['capture_rate'], bins=20, kde=True)
plt.title('Distribution of Pokémon Capture Rates')
plt.xlabel('Capture Rate')
plt.ylabel('Count')
plt.show()

# 6. How do the base stats of legendary Pokémon compare to non-legendary Pokémon?
legendary_stats = data[data['is_legendary'] == True][['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']]
non_legendary_stats = data[data['is_legendary'] == False][['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']]

print("Legendary Pokémon Stats:")
print(legendary_stats.describe())

print("\nNon-Legendary Pokémon Stats:")
print(non_legendary_stats.describe())

# 7. What are the relationships between different base stats (e.g., HP vs. Defense)?
plt.figure(figsize=(8, 6))
sns.scatterplot(x='hp', y='defense', data=data)
plt.title('HP vs Defense')
plt.xlabel('HP')
plt.ylabel('Defense')
plt.show()
Comparison of Base Stats between Generations:
hp: F-statistic = 1.8411756695267976, p-value = 0.08842313370826954
attack: F-statistic = 2.5088372059173945, p-value = 0.020652498695911767
defense: F-statistic = 0.8310245792815056, p-value = 0.545987632603192
sp_attack: F-statistic = 1.3965441109485413, p-value = 0.21314318986849756
sp_defense: F-statistic = 1.6370798250197902, p-value = 0.13392388768415722
speed: F-statistic = 1.4793080703163086, p-value = 0.1822617696076044
Top 5 Most Common Primary Types: Index(['water', 'normal', 'grass', 'bug', 'psychic'], dtype='object')
Top 5 Most Common Secondary Types: Index(['NIL', 'flying', 'poison', 'ground', 'psychic'], dtype='object')
Legendary Pokémon Stats:
               hp      attack     defense   sp_attack  sp_defense       speed
count   70.000000   70.000000   70.000000   70.000000   70.000000   70.000000
mean    95.428571  109.357143   99.400000  113.757143  101.885714   95.428571
std     30.012351   31.562698   26.929538   34.403587   29.277554   24.671506
min     43.000000   29.000000   31.000000   29.000000   31.000000   37.000000
25%     80.000000   90.000000   89.250000   92.000000   90.000000   85.000000
50%     91.000000  104.000000  100.000000  114.000000  100.000000   98.000000
75%    103.750000  129.750000  115.000000  131.000000  120.000000  108.000000
max    223.000000  181.000000  200.000000  194.000000  200.000000  180.000000

Non-Legendary Pokémon Stats:
               hp      attack     defense   sp_attack  sp_defense       speed
count  731.000000  731.000000  731.000000  731.000000  731.000000  731.000000
mean    66.424077   74.841313   70.481532   67.240766   67.945280   63.548564
std     24.804290   30.576820   29.929838   29.084657   25.958128   27.736456
min      1.000000    5.000000    5.000000   10.000000   20.000000    5.000000
25%     50.000000   53.000000   50.000000   45.000000   50.000000   43.000000
50%     65.000000   70.000000   65.000000   62.000000   65.000000   60.000000
75%     78.000000   95.000000   85.000000   85.000000   85.000000   81.000000
max    255.000000  185.000000  230.000000  175.000000  230.000000  160.000000
In [13]:
# Distribution of base stats
sns.histplot(data['hp'], kde=True)
plt.show()
sns.histplot(data['attack'], kde=True)
plt.show()
sns.histplot(data['defense'], kde=True)
plt.show()
sns.histplot(data['sp_attack'], kde=True)
plt.show()
sns.histplot(data['sp_defense'], kde=True)
plt.show()
In [14]:
# Line plot for average stats by Generation
avg_stats = data.groupby('generation').mean().reset_index()
plt.figure(figsize=(10, 6))
for stat in ['hp', 'attack', 'defense', 'speed', 'sp_attack', 'sp_defense']:
    plt.plot(avg_stats['generation'], avg_stats[stat], label=stat)
plt.legend()
plt.xlabel('Generation')
plt.ylabel('Average Value')
plt.title('Average Stats by Generation')
plt.show()
C:\Users\Anusha\AppData\Local\Temp\ipykernel_15256\1292573428.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  avg_stats = data.groupby('generation').mean().reset_index()
In [15]:
import pandas.plotting as pd_plotting

# Scatter plot matrix for base stats
pd_plotting.scatter_matrix(data[['hp', 'attack', 'defense', 'speed', 'sp_attack', 'sp_defense']], figsize=(15, 15))
plt.show()
In [16]:
import seaborn as sns
import matplotlib.pyplot as plt

# Pair plot for base stats
sns.pairplot(data[['hp', 'attack', 'defense', 'speed', 'sp_attack', 'sp_defense']])
plt.show()
In [17]:
# Facet Grid for Attack vs. Defense across generations
g = sns.FacetGrid(data, col="generation", col_wrap=3)
g.map(sns.scatterplot, "attack", "defense")
plt.show()
In [25]:
legendary_stats = data[data['is_legendary'] == True][['type1', 'type2']].describe()
non_legendary_stats = data[data['is_legendary'] == False][['type1', 'type2']].describe()
print("Legendary Pokémon Stats:", legendary_stats)
print("Non-Legendary Pokémon Stats:", non_legendary_stats)
Legendary Pokémon Stats:/t           type1 type2
count        70    70
unique       16    15
top     psychic   NIL
freq         17    25
Non-Legendary Pokémon Stats:/t         type1 type2
count     731   731
unique     18    19
top     water   NIL
freq      108   359
In [39]:
generation_counts = data['generation'].value_counts().sort_index()
print("Number of Pokémon per Generation:", generation_counts)

sns.lineplot(x=generation_counts.index, y=generation_counts.values)
plt.title('Poke Count by Generation')
plt.xlabel('Generation')
plt.ylabel('Count')
plt.show()
Number of Pokémon per Generation: 1    151
2    100
3    135
4    107
5    156
6     72
7     80
Name: generation, dtype: int64
In [45]:
correlation = data[['attack', 'speed']].corr()
print("Correlation between Attack and Speed:", correlation)
# Scatter plot for Speed vs. Defense
sns.scatterplot(x='speed', y='attack', data = data)
plt.title('Speed vs. Attack')
plt.xlabel('Speed')
plt.ylabel('Attack')
plt.show()
Correlation between Attack and Speed:           attack     speed
attack  1.000000  0.352703
speed   0.352703  1.000000
In [34]:
water_hp = data[data['type1'] == 'water']['hp'].mean()
other_types_hp = data[data['type1'] != 'water']['hp'].mean()
print("Water Type Average HP:", water_hp)
print("Other Types Average HP:", other_types_hp)
Water Type Average HP: 70.21929824561404
Other Types Average HP: 68.74963609898107
In [38]:
avg_speed_by_gen = data.groupby('generation')['speed'].mean()
print("Average Speed by Generation:\n", avg_speed_by_gen)

# Line plot for Average Speed by Generation
sns.lineplot(x=avg_speed_by_gen.index, y=avg_speed_by_gen.values)
plt.title('Average Speed by Generation')
plt.xlabel('Generation')
plt.ylabel('Average Speed')
plt.show()
Average Speed by Generation:
 generation
1    70.152318
2    61.610000
3    63.577778
4    70.074766
5    66.519231
6    66.652778
7    64.037500
Name: speed, dtype: float64
In [40]:
avg_attack_by_type = data.groupby('type1')['attack'].mean().sort_values(ascending=False)
print("Average Attack by Type:\n", avg_attack_by_type)

# Bar plot for Average Attack by Type
sns.barplot(x=avg_attack_by_type.index, y=avg_attack_by_type.values)
plt.title('Average Attack by Type')
plt.xlabel('Type')
plt.ylabel('Average Attack')
plt.xticks(rotation=90)
plt.show()
Average Attack by Type:
 type1
dragon      106.407407
fighting     99.178571
ground       94.812500
steel        93.083333
rock         90.666667
dark         87.793103
fire         81.500000
normal       75.161905
grass        73.769231
water        73.307018
ice          73.304348
ghost        72.740741
poison       72.656250
electric     70.820513
bug          70.125000
flying       66.666667
psychic      65.566038
fairy        62.111111
Name: attack, dtype: float64
In [44]:
correlation = data[['speed', 'defense']].corr()
print("Correlation between Speed and Defense:\n", correlation)

# Scatter plot for Speed vs. Defense
sns.scatterplot(x='speed', y='defense', data = data)
plt.title('Speed vs. Defense')
plt.xlabel('Speed')
plt.ylabel('Defense')
plt.show()
Correlation between Speed and Defense:
             speed   defense
speed    1.000000  0.007934
defense  0.007934  1.000000
In [47]:
avg_defense_by_type = data.groupby('type1')['defense'].mean().sort_values(ascending=False)
print("Average Defence by Type:\n", avg_defense_by_type)

# Bar plot for Average Attack by Type
sns.barplot(x=avg_defense_by_type.index, y=avg_defense_by_type.values)
plt.title('Average Defense by Type')
plt.xlabel('Type')
plt.ylabel('Average Defense')
plt.xticks(rotation=90)
plt.show()
Average Defence by Type:
 type1
steel       120.208333
rock         96.266667
dragon       86.259259
ground       83.906250
ghost        79.518519
water        73.482456
ice          71.913043
grass        70.871795
bug          70.847222
dark         70.517241
poison       70.031250
psychic      69.264151
fairy        68.166667
fire         67.788462
fighting     66.392857
flying       65.000000
electric     61.820513
normal       59.695238
Name: defense, dtype: float64
In [48]:
avg_hp_by_type = data.groupby('type1')['hp'].mean().sort_values(ascending=False)
print("Average HP by Type:\n", avg_hp_by_type)

# Bar plot for Average Attack by Type
sns.barplot(x=avg_hp_by_type.index, y=avg_hp_by_type.values)
plt.title('Average HP by Type')
plt.xlabel('Type')
plt.ylabel('Average HP')
plt.xticks(rotation=90)
plt.show()
Average HP by Type:
 type1
dragon      79.851852
normal      76.723810
fairy       73.944444
ground      73.187500
psychic     72.943396
dark        72.551724
ice         72.086957
fighting    71.428571
water       70.219298
fire        68.730769
flying      68.000000
steel       66.791667
rock        66.333333
poison      65.593750
grass       65.358974
ghost       63.370370
electric    60.512821
bug         56.722222
Name: hp, dtype: float64
In [53]:
# Average Speed by Type
avg_speed_by_type = data.groupby('type1')['speed'].mean().sort_values(ascending=False)
print("Average Speed by Type:\n", avg_speed_by_type)

# Bar plot for Average Speed by Type
sns.barplot(x=avg_speed_by_type.index, y=avg_speed_by_type.values)
plt.title('Average Speed by Type')
plt.xlabel('Type')
plt.ylabel('Average Speed')
plt.xticks(rotation=90)
plt.show()
Average Speed by Type:
 type1
flying      99.666667
electric    85.410256
dragon      76.111111
dark        75.310345
psychic     75.150943
fire        73.346154
normal      69.533333
fighting    64.285714
poison      64.187500
water       63.921053
bug         63.569444
ice         62.739130
ground      59.968750
grass       59.025641
ghost       58.333333
rock        57.422222
steel       56.583333
fairy       53.666667
Name: speed, dtype: float64
In [52]:
# Average Special Attack by Type
avg_sp_attack_by_type = data.groupby('type1')['sp_attack'].mean().sort_values(ascending=False)
print("Average Special Attack by Type:\n", avg_sp_attack_by_type)

# Bar plot for Average Special Attack by Type
sns.barplot(x=avg_sp_attack_by_type.index, y=avg_sp_attack_by_type.values)
plt.title('Average Special Attack by Type')
plt.xlabel('Type')
plt.ylabel('Average Special Attack')
plt.xticks(rotation=90)
plt.show()
Average Special Attack by Type:
 type1
psychic     92.603774
dragon      89.592593
fire        87.730769
electric    87.538462
flying      84.000000
ghost       82.444444
fairy       81.500000
ice         77.434783
dark        74.517241
grass       74.320513
water       74.061404
steel       72.708333
rock        63.200000
poison      61.562500
normal      56.980952
bug         56.652778
ground      51.937500
fighting    50.107143
Name: sp_attack, dtype: float64
In [51]:
# Average Special Defense by Type
avg_sp_defense_by_type = data.groupby('type1')['sp_defense'].mean().sort_values(ascending=False)
print("Average Special Defense by Type:\n", avg_sp_defense_by_type)

# Bar plot for Average Special Defense by Type
sns.barplot(x=avg_sp_defense_by_type.index, y=avg_sp_defense_by_type.values)
plt.title('Average Special Defense by Type')
plt.xlabel('Type')
plt.ylabel('Average Special Defense')
plt.xticks(rotation=90)
plt.show()
Average Special Defense by Type:
 type1
fairy       87.777778
psychic     85.735849
dragon      84.555556
steel       82.208333
ghost       78.296296
ice         76.130435
rock        73.377778
water       71.798246
fire        71.538462
electric    70.051282
flying      70.000000
grass       69.230769
dark        69.068966
poison      65.531250
fighting    63.428571
normal      63.200000
bug         62.513889
ground      62.281250
Name: sp_defense, dtype: float64
In [64]:
# Define a list of base stats to compare with base happiness
base_stats = ['capture_rate', 'hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed']

# Iterate over the base stats and create correlation and scatter plots
for stat in base_stats:
    # Calculate correlation
    correlation = data[['base_happiness', stat]].corr()
    print(f"Correlation between Base Happiness and {stat.capitalize()}:\n", correlation)
    
    # Scatter plot
    sns.scatterplot(x='base_happiness', y=stat, data=data)
    plt.title(f'Base Happiness vs. {stat.capitalize()}')
    plt.xlabel('Base Happiness')
    plt.ylabel(stat.capitalize())
    plt.show()
C:\Users\Anusha\AppData\Local\Temp\ipykernel_15256\870604065.py:7: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  correlation = data[['base_happiness', stat]].corr()
Correlation between Base Happiness and Capture_rate:
                 base_happiness
base_happiness             1.0
Correlation between Base Happiness and Hp:
                 base_happiness        hp
base_happiness        1.000000 -0.108217
hp                   -0.108217  1.000000
Correlation between Base Happiness and Attack:
                 base_happiness    attack
base_happiness        1.000000 -0.251811
attack               -0.251811  1.000000
Correlation between Base Happiness and Defense:
                 base_happiness   defense
base_happiness        1.000000 -0.191503
defense              -0.191503  1.000000
Correlation between Base Happiness and Sp_attack:
                 base_happiness  sp_attack
base_happiness        1.000000  -0.228924
sp_attack            -0.228924   1.000000
Correlation between Base Happiness and Sp_defense:
                 base_happiness  sp_defense
base_happiness        1.000000   -0.149719
sp_defense           -0.149719    1.000000
Correlation between Base Happiness and Speed:
                 base_happiness     speed
base_happiness        1.000000 -0.148753
speed                -0.148753  1.000000
In [68]:
# Calculate the average base happiness for Legendary and Non-Legendary Pokémon
avg_happiness_legendary = data[data['is_legendary'] == True]['base_happiness'].mean()
avg_happiness_non_legendary = data[data['is_legendary'] == False]['base_happiness'].mean()

print("Average Base Happiness for Legendary Pokémon:", avg_happiness_legendary)
print("Average Base Happiness for Non-Legendary Pokémon:", avg_happiness_non_legendary)

# Bar plot for Average Base Happiness
sns.barplot(x=['Legendary', 'Non-Legendary'], y=[avg_happiness_legendary, avg_happiness_non_legendary])
plt.title('Average Base Happiness: Legendary vs Non-Legendary Pokémon')
plt.xlabel('Category')
plt.ylabel('Average Base Happiness')
plt.show()

# Perform a t-test to check for statistical significance
legendary_happiness = data[data['is_legendary'] == True]['base_happiness']
non_legendary_happiness = data[data['is_legendary'] == False]['base_happiness']

t_stat, p_value = ttest_ind(legendary_happiness, non_legendary_happiness)
print("T-statistic:", t_stat)
print("P-value:", p_value)

if p_value < 0.05:
    print("There is a significant difference in base happiness between Legendary and Non-Legendary Pokémon.")
else:
    print("There is no significant difference in base happiness between Legendary and Non-Legendary Pokémon.")
Average Base Happiness for Legendary Pokémon: 39.214285714285715
Average Base Happiness for Non-Legendary Pokémon: 67.86593707250341
T-statistic: -12.82241778643544
P-value: 2.3225400306230407e-34
There is a significant difference in base happiness between Legendary and Non-Legendary Pokémon.
In [71]:
from scipy.stats import f_oneway

avg_happiness_by_type = data.groupby('type1')['base_happiness'].mean().sort_values()
print("Average Base Happiness by Type:\n", avg_happiness_by_type)

# Bar plot for Average Base Happiness by Type
plt.figure(figsize=(12, 6))
sns.barplot(x=avg_happiness_by_type.index, y=avg_happiness_by_type.values)
plt.title('Average Base Happiness by Pokémon Type')
plt.xlabel('Type')
plt.ylabel('Average Base Happiness')
plt.xticks(rotation=90)
plt.show()

# Perform ANOVA to check for statistical significance
type_groups = [group['base_happiness'].values for name, group in data.groupby('type1')]
f_stat, p_value = f_oneway(*type_groups)
print("F-statistic:", f_stat)
print("P-value:", p_value)

if p_value < 0.05:
    print("There is a significant difference in base happiness among different Pokémon types.")
else:
    print("There is no significant difference in base happiness among different Pokémon types.")
Average Base Happiness by Type:
 type1
dark        42.241379
dragon      42.962963
steel       50.833333
ghost       58.333333
psychic     63.584906
rock        64.555556
ice         65.434783
bug         67.083333
water       67.543860
grass       67.692308
ground      68.437500
fire        68.461538
electric    68.461538
normal      69.571429
fighting    70.000000
poison      70.937500
flying      76.666667
fairy       77.777778
Name: base_happiness, dtype: float64
F-statistic: 7.8233967908132405
P-value: 2.6609058879442524e-18
There is a significant difference in base happiness among different Pokémon types.
In [73]:
from scipy.stats import pearsonr

# Load your dataset into a DataFrame (assuming data is your DataFrame)
# data = pd.read_csv('path_to_pokemon_dataset.csv')

# Compare Experience Growth with Generation
avg_exp_growth_by_generation = data.groupby('generation')['experience_growth'].mean().sort_values()
print("Average Experience Growth by Generation:\n", avg_exp_growth_by_generation)

# Bar plot for Average Experience Growth by Generation
plt.figure(figsize=(12, 6))
sns.barplot(x=avg_exp_growth_by_generation.index, y=avg_exp_growth_by_generation.values)
plt.title('Average Experience Growth by Generation')
plt.xlabel('Generation')
plt.ylabel('Average Experience Growth')
plt.show()

# Compare Experience Growth with Legendary Status
avg_exp_growth_legendary = data[data['is_legendary'] == True]['experience_growth'].mean()
avg_exp_growth_non_legendary = data[data['is_legendary'] == False]['experience_growth'].mean()

print("Average Experience Growth for Legendary Pokémon:", avg_exp_growth_legendary)
print("Average Experience Growth for Non-Legendary Pokémon:", avg_exp_growth_non_legendary)

# Bar plot for Average Experience Growth by Legendary Status
plt.figure(figsize=(8, 6))
sns.barplot(x=['Legendary', 'Non-Legendary'], y=[avg_exp_growth_legendary, avg_exp_growth_non_legendary])
plt.title('Average Experience Growth: Legendary vs Non-Legendary Pokémon')
plt.xlabel('Category')
plt.ylabel('Average Experience Growth')
plt.show()

# Compare Experience Growth with Base Total
plt.figure(figsize=(12, 6))
sns.scatterplot(x='base_total', y='experience_growth', data=data)
plt.title('Experience Growth vs Base Total')
plt.xlabel('Base Total')
plt.ylabel('Experience Growth')
plt.show()

# Calculate and display the correlation coefficient
correlation_exp_growth_base_total = pearsonr(data['base_total'], data['experience_growth'])
print("Correlation between Base Total and Experience Growth:\n", correlation_exp_growth_base_total)
Average Experience Growth by Generation:
 generation
2    1.025866e+06
6    1.050528e+06
1    1.052281e+06
4    1.055664e+06
3    1.058856e+06
5    1.061557e+06
7    1.080352e+06
Name: experience_growth, dtype: float64
Average Experience Growth for Legendary Pokémon: 1241851.142857143
Average Experience Growth for Non-Legendary Pokémon: 1037102.7906976744
Correlation between Base Total and Experience Growth:
 PearsonRResult(statistic=0.2569300707459611, pvalue=1.5293506350014974e-13)
In [74]:
avg_exp_growth_by_type = data.groupby('type1')['experience_growth'].mean().sort_values()
print("Average Experience Growth by Type:\n", avg_exp_growth_by_type)

# Bar plot for Average Experience Growth by Type
plt.figure(figsize=(12, 6))
sns.barplot(x=avg_exp_growth_by_type.index, y=avg_exp_growth_by_type.values)
plt.title('Average Experience Growth by Pokémon Type')
plt.xlabel('Type')
plt.ylabel('Average Experience Growth')
plt.xticks(rotation=90)
plt.show()
Average Experience Growth by Type:
 type1
fairy       9.138889e+05
rock        9.793116e+05
bug         1.009006e+06
normal      1.009973e+06
ghost       1.018117e+06
water       1.056716e+06
fire        1.064735e+06
ground      1.069652e+06
electric    1.073312e+06
fighting    1.076021e+06
psychic     1.079405e+06
grass       1.079791e+06
flying      1.083333e+06
ice         1.086069e+06
poison      1.086849e+06
dark        1.102720e+06
steel       1.126232e+06
dragon      1.216667e+06
Name: experience_growth, dtype: float64
In [78]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report

# Load your dataset into a DataFrame (assuming data is your DataFrame)
# data = pd.read_csv('path_to_pokemon_dataset.csv')

# Select relevant features
features = ['hp', 'attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'generation', 'base_total']

X = data[features]
y = data['is_legendary']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the XGBoost classifier
xgb_classifier = xgb.XGBClassifier(objective='binary:logistic', random_state=42)

# Train the classifier
xgb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = xgb_classifier.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Model Performance:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

# Classification report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
Model Performance:
Accuracy: 0.96
Precision: 0.82
Recall: 0.78
F1 Score: 0.80

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       143
           1       0.82      0.78      0.80        18

    accuracy                           0.96       161
   macro avg       0.90      0.88      0.89       161
weighted avg       0.96      0.96      0.96       161

In [ ]: